import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
= pd.read_csv('/content/processed.csv')
df 'Date'] = pd.to_datetime(df['Date'], format="%d-%m-%Y")
df[print(df['Date'].dtype)
= df['Date']
df.index 'Date', axis=1, inplace=True) df.drop(
datetime64[ns]
df
PM2.5 | CO2 | NO2 | NH3 | CO | SO2 | O3 | AQI | |
---|---|---|---|---|---|---|---|---|
Date | ||||||||
2015-03-04 | 103.75 | 42.23 | 36.98 | 10.04 | 1.33 | 4.65 | 27.54 | 289 |
2015-04-01 | 106.27 | 33.67 | 38.34 | 11.53 | 1.08 | 6.22 | 27.65 | 206 |
2015-04-02 | 42.82 | 23.17 | 35.32 | 10.17 | 1.21 | 6.23 | 27.60 | 248 |
2015-04-03 | 45.57 | 0.49 | 37.37 | 0.25 | 0.81 | 9.23 | 68.48 | 111 |
2015-04-05 | 85.86 | 17.20 | 26.79 | 12.33 | 1.14 | 3.96 | 27.58 | 302 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-06-06 | 12.81 | 0.95 | 53.12 | 2.45 | 0.59 | 6.07 | 11.68 | 45 |
2021-06-07 | 11.81 | 0.95 | 40.84 | 2.49 | 0.57 | 6.04 | 15.42 | 34 |
2021-06-08 | 14.04 | 0.95 | 44.77 | 2.63 | 0.57 | 5.88 | 11.45 | 32 |
2021-06-09 | 16.26 | 0.95 | 49.22 | 2.01 | 0.61 | 6.19 | 10.09 | 41 |
2021-06-10 | 14.21 | 0.95 | 39.15 | 1.72 | 0.59 | 5.59 | 13.85 | 33 |
2224 rows × 8 columns
# Normalize the data
= MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit_transform(df)
scaled_data
# Convert the dataframe to numpy array
= scaled_data data
# Define the percentage of the dataset to be used for training
= int(np.ceil(len(data) * 0.8))
training_data_len
# Create the training data
= data[0:training_data_len, :]
train_data
# Split the data into x_train and y_train
= []
x_train = []
y_train
for i in range(80, len(train_data)):
-80:i, :])
x_train.append(train_data[i
y_train.append(train_data[i, :])
# Convert x_train and y_train to numpy arrays
= np.array(x_train), np.array(y_train)
x_train, y_train
# Create the test data
= data[training_data_len - 80:, :]
test_data
# Split the data into x_test and y_test
= []
x_test = data[training_data_len:, :]
y_test
for i in range(60, len(test_data)):
-80:i, :])
x_test.append(test_data[i
# Convert x_test to numpy arrays
= np.array(x_test) x_test
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
# Build the LSTM model
= Sequential()
model =100, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(LSTM(units0.2))
model.add(Dropout(=100, return_sequences=True))
model.add(LSTM(units0.2))
model.add(Dropout(=100))
model.add(LSTM(units0.2))
model.add(Dropout(=50))
model.add(Dense(units=8)) # 8 output features
model.add(Dense(units
# Compile the model
compile(optimizer='adam', loss='mean_squared_error')
model.
# Train the model
= model.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2) history
Epoch 1/50
33/33 [==============================] - 16s 190ms/step - loss: 0.0155 - val_loss: 0.0074
Epoch 2/50
33/33 [==============================] - 6s 173ms/step - loss: 0.0101 - val_loss: 0.0070
Epoch 3/50
33/33 [==============================] - 5s 160ms/step - loss: 0.0096 - val_loss: 0.0067
Epoch 4/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0093 - val_loss: 0.0063
Epoch 5/50
33/33 [==============================] - 6s 191ms/step - loss: 0.0088 - val_loss: 0.0065
Epoch 6/50
33/33 [==============================] - 5s 150ms/step - loss: 0.0089 - val_loss: 0.0061
Epoch 7/50
33/33 [==============================] - 6s 169ms/step - loss: 0.0088 - val_loss: 0.0063
Epoch 8/50
33/33 [==============================] - 5s 159ms/step - loss: 0.0089 - val_loss: 0.0071
Epoch 9/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0085 - val_loss: 0.0060
Epoch 10/50
33/33 [==============================] - 6s 190ms/step - loss: 0.0083 - val_loss: 0.0058
Epoch 11/50
33/33 [==============================] - 6s 179ms/step - loss: 0.0082 - val_loss: 0.0062
Epoch 12/50
33/33 [==============================] - 10s 317ms/step - loss: 0.0084 - val_loss: 0.0070
Epoch 13/50
33/33 [==============================] - 5s 151ms/step - loss: 0.0080 - val_loss: 0.0061
Epoch 14/50
33/33 [==============================] - 8s 242ms/step - loss: 0.0079 - val_loss: 0.0069
Epoch 15/50
33/33 [==============================] - 6s 196ms/step - loss: 0.0079 - val_loss: 0.0063
Epoch 16/50
33/33 [==============================] - 7s 227ms/step - loss: 0.0077 - val_loss: 0.0066
Epoch 17/50
33/33 [==============================] - 5s 162ms/step - loss: 0.0077 - val_loss: 0.0064
Epoch 18/50
33/33 [==============================] - 7s 203ms/step - loss: 0.0076 - val_loss: 0.0060
Epoch 19/50
33/33 [==============================] - 6s 183ms/step - loss: 0.0075 - val_loss: 0.0066
Epoch 20/50
33/33 [==============================] - 6s 188ms/step - loss: 0.0074 - val_loss: 0.0064
Epoch 21/50
33/33 [==============================] - 5s 150ms/step - loss: 0.0075 - val_loss: 0.0065
Epoch 22/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0073 - val_loss: 0.0074
Epoch 23/50
33/33 [==============================] - 6s 196ms/step - loss: 0.0073 - val_loss: 0.0058
Epoch 24/50
33/33 [==============================] - 5s 149ms/step - loss: 0.0073 - val_loss: 0.0058
Epoch 25/50
33/33 [==============================] - 6s 190ms/step - loss: 0.0073 - val_loss: 0.0060
Epoch 26/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0071 - val_loss: 0.0058
Epoch 27/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0069 - val_loss: 0.0062
Epoch 28/50
33/33 [==============================] - 6s 187ms/step - loss: 0.0069 - val_loss: 0.0061
Epoch 29/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0068 - val_loss: 0.0056
Epoch 30/50
33/33 [==============================] - 6s 186ms/step - loss: 0.0067 - val_loss: 0.0054
Epoch 31/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0065 - val_loss: 0.0061
Epoch 32/50
33/33 [==============================] - 5s 162ms/step - loss: 0.0064 - val_loss: 0.0054
Epoch 33/50
33/33 [==============================] - 6s 176ms/step - loss: 0.0065 - val_loss: 0.0061
Epoch 34/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0065 - val_loss: 0.0053
Epoch 35/50
33/33 [==============================] - 6s 191ms/step - loss: 0.0064 - val_loss: 0.0056
Epoch 36/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0063 - val_loss: 0.0051
Epoch 37/50
33/33 [==============================] - 5s 158ms/step - loss: 0.0062 - val_loss: 0.0066
Epoch 38/50
33/33 [==============================] - 6s 178ms/step - loss: 0.0062 - val_loss: 0.0049
Epoch 39/50
33/33 [==============================] - 5s 149ms/step - loss: 0.0060 - val_loss: 0.0050
Epoch 40/50
33/33 [==============================] - 6s 189ms/step - loss: 0.0060 - val_loss: 0.0055
Epoch 41/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0060 - val_loss: 0.0051
Epoch 42/50
33/33 [==============================] - 5s 163ms/step - loss: 0.0061 - val_loss: 0.0044
Epoch 43/50
33/33 [==============================] - 6s 171ms/step - loss: 0.0059 - val_loss: 0.0052
Epoch 44/50
33/33 [==============================] - 5s 151ms/step - loss: 0.0059 - val_loss: 0.0052
Epoch 45/50
33/33 [==============================] - 6s 190ms/step - loss: 0.0059 - val_loss: 0.0052
Epoch 46/50
33/33 [==============================] - 5s 148ms/step - loss: 0.0058 - val_loss: 0.0053
Epoch 47/50
33/33 [==============================] - 6s 170ms/step - loss: 0.0057 - val_loss: 0.0053
Epoch 48/50
33/33 [==============================] - 5s 160ms/step - loss: 0.0058 - val_loss: 0.0058
Epoch 49/50
33/33 [==============================] - 5s 150ms/step - loss: 0.0057 - val_loss: 0.0055
Epoch 50/50
33/33 [==============================] - 6s 193ms/step - loss: 0.0057 - val_loss: 0.0053
# Get the predicted scaled values
= model.predict(x_test)
predictions
# Unscale the predicted values
= scaler.inverse_transform(predictions)
predictions
# Unscale the true values
= scaler.inverse_transform(y_test) y_test
14/14 [==============================] - 2s 42ms/step
# prompt: accuracy of all 8 columns
from sklearn.metrics import mean_absolute_percentage_error
= []
accuracy_scores for i in range(8):
= 100 - mean_absolute_percentage_error(y_test[:, i], predictions[:, i])
accuracy
accuracy_scores.append(accuracy)
for i, column in enumerate(df.columns):
print(f"Accuracy for {column}: {accuracy_scores[i]:.2f}%")
Accuracy for PM2.5: 99.05%
Accuracy for CO2: 93.62%
Accuracy for NO2: 95.24%
Accuracy for NH3: 81.07%
Accuracy for CO: 99.06%
Accuracy for SO2: 99.58%
Accuracy for O3: 99.69%
Accuracy for AQI: 99.54%
# plot for all 8 parameter as all accuracy above 90 the the graph should overlap actual and accuracy were x axis is date and y axis is value total 8 plots
# Plot for all 8 parameters
= plt.subplots(4, 2, figsize=(15, 15))
fig, axes = axes.flatten()
axes
for i, column in enumerate(df.columns):
# Plot the actual and predicted values
='Actual', color='blue')
axes[i].plot(y_test[:, i], label='Predicted', color='orange')
axes[i].plot(predictions[:, i], label
# Set the title and labels
f'{column} Prediction')
axes[i].set_title('Date')
axes[i].set_xlabel(
axes[i].set_ylabel(column)
# Add legend and grid
axes[i].legend()True)
axes[i].grid(
# Adjust the spacing between subplots
plt.tight_layout()
# Show the plot
plt.show()
# Prepare the last 60 days of data for prediction
= data[-60:]
last_60_days
# Create an empty list to store the predictions
= []
future_predictions
# Predict the next 30 days
for i in range(30):
# Prepare the input data for prediction
= last_60_days[-60:]
input_data = input_data.reshape((1, 60, input_data.shape[1]))
input_data
# Make the prediction
= model.predict(input_data)
pred
future_predictions.append(pred)
# Append the prediction to the input data
= np.append(last_60_days, pred, axis=0)
last_60_days
# Convert the predictions to the original scale
= np.array(future_predictions)
future_predictions = scaler.inverse_transform(future_predictions.reshape(-1, 8))
future_predictions
# Create a DataFrame for the predictions
= pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=30)
future_dates = pd.DataFrame(future_predictions, index=future_dates, columns=df.columns)
future_df
# Display the future predictions
print(future_df)
1/1 [==============================] - 0s 53ms/step
1/1 [==============================] - 0s 47ms/step
1/1 [==============================] - 0s 55ms/step
# prompt: # Display the future predictions
# print(future_df) in a table
print(future_df.to_string())
PM2.5 PM10 NO NO2 NH3 CO SO2 O3 AQI
2021-06-11 60.002617 50.883610 19.367136 64.502838 125.114624 0.084118 10.370060 23.871712 111.281906
2021-06-12 63.107674 50.931423 19.667995 67.124435 139.685120 0.134839 10.653792 30.027687 126.617409
2021-06-13 65.961617 50.986816 20.320517 70.815575 164.583359 0.140377 10.899661 34.855602 136.784073
2021-06-14 68.732407 50.799286 21.489349 75.738174 194.399628 0.135283 11.183870 37.846966 143.676620
2021-06-15 71.227882 50.195274 23.231794 82.042854 226.994980 0.156221 11.501744 39.069695 148.632263
2021-06-16 73.960976 49.040543 25.583351 90.032005 262.923340 0.227048 11.918674 39.181587 154.310989
2021-06-17 77.498848 47.231152 28.616997 100.117615 303.442413 0.368728 12.510146 38.920353 163.206650
2021-06-18 82.624298 44.706070 32.421143 112.722176 350.208984 0.594957 13.374964 38.808331 177.737350
2021-06-19 89.576965 41.350803 37.298241 128.602509 404.635071 0.922864 14.569669 39.044239 199.103348
2021-06-20 98.596283 37.118710 43.544445 148.375626 468.300812 1.359627 16.160398 39.463409 228.189209
2021-06-21 109.853798 32.161362 51.309780 172.105316 542.640137 1.891059 18.229544 39.756248 265.564972
2021-06-22 122.630562 26.895773 60.530247 198.991699 627.387085 2.482280 20.787415 39.576397 309.820312
2021-06-23 134.973038 21.945454 70.834549 227.198822 719.213440 3.081508 23.706568 38.524033 356.210358
2021-06-24 145.167496 17.976177 81.305817 253.867737 812.449280 3.627386 26.778667 36.305920 399.092255
2021-06-25 152.073761 15.283709 91.029396 276.791962 900.324036 4.099170 29.735008 32.934376 434.620575
2021-06-26 156.174484 13.837508 99.263039 294.902496 977.809387 4.492974 32.430622 28.939962 463.200653
2021-06-27 158.393494 13.308738 105.696167 308.313965 1042.065552 4.834368 34.739471 24.877388 487.164734
2021-06-28 159.257050 13.299580 110.490067 317.894501 1092.751587 5.136467 36.607742 21.063507 507.349152
2021-06-29 159.326279 13.462250 113.893105 324.476257 1131.428955 5.394811 38.061085 17.656536 523.816406
2021-06-30 158.959076 13.604235 116.233215 328.870331 1160.343018 5.599116 39.177837 14.746899 536.305908
2021-07-01 158.314667 13.566515 117.790405 331.685333 1181.632690 5.753644 40.009975 12.268852 544.747253
2021-07-02 157.514328 13.283374 118.787514 333.384064 1197.182373 5.864694 40.614212 10.147185 549.482605
2021-07-03 156.795959 12.850793 119.387878 334.329285 1208.620361 5.927744 41.081924 8.407581 551.290283
2021-07-04 156.228821 12.299986 119.707817 334.758209 1217.040161 5.951909 41.451111 7.002137 550.914917
2021-07-05 155.806351 11.670095 119.844559 334.858124 1223.279541 5.947795 41.745251 5.861260 548.983765
2021-07-06 155.543106 11.004136 119.859505 334.750854 1227.961426 5.923184 41.986279 4.943182 546.092773
2021-07-07 155.424286 10.335439 119.799706 334.524231 1231.533691 5.885419 42.187820 4.207241 542.689697
2021-07-08 155.445999 9.693343 119.691864 334.229004 1234.319092 5.838931 42.363049 3.623223 539.110535
2021-07-09 155.582382 9.081307 119.555115 333.894836 1236.534912 5.788843 42.515244 3.160630 535.573608
2021-07-10 155.805588 8.510557 119.407600 333.550507 1238.335205 5.738425 42.648857 2.795202 532.205505
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Calculate the evaluation metrics for all 9 columns
for i in range(8):
= mean_absolute_error(y_test[:, i], predictions[:, i])
mae = mean_squared_error(y_test[:, i], predictions[:, i])
mse = np.sqrt(mse)
rmse print(f"Column: {df.columns[i]}")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print()
Column: PM2.5
Mean Absolute Error (MAE): 20.543749164482495
Mean Squared Error (MSE): 706.9251868186798
Root Mean Squared Error (RMSE): 26.58806474376576
Column: CO2
Mean Absolute Error (MAE): 6.656599771826116
Mean Squared Error (MSE): 122.15371069521923
Root Mean Squared Error (RMSE): 11.052316983113506
Column: NO2
Mean Absolute Error (MAE): 8.842603219178347
Mean Squared Error (MSE): 210.01329653616122
Root Mean Squared Error (RMSE): 14.491835513010807
Column: NH3
Mean Absolute Error (MAE): 42.03412670572062
Mean Squared Error (MSE): 2395.370115467304
Root Mean Squared Error (RMSE): 48.942518483086914
Column: CO
Mean Absolute Error (MAE): 0.41463548268606837
Mean Squared Error (MSE): 0.41688352785812216
Root Mean Squared Error (RMSE): 0.645665182473178
Column: SO2
Mean Absolute Error (MAE): 2.070023189506552
Mean Squared Error (MSE): 8.293230722098933
Root Mean Squared Error (RMSE): 2.8797969932095793
Column: O3
Mean Absolute Error (MAE): 8.264868471187514
Mean Squared Error (MSE): 138.16901845005188
Root Mean Squared Error (RMSE): 11.754531826068272
Column: AQI
Mean Absolute Error (MAE): 29.071826065714294
Mean Squared Error (MSE): 1636.6938253224598
Root Mean Squared Error (RMSE): 40.45607278669619